import sys
sys.path.append('../../')
from BasicLibraries import *
import pickle
import allocation_ga as af



AM = af.AllocationGAD()

#==== Choose your population
population_type_ = 'LandscapePopulation_0724_full.txt'
#====
#==== Binarisation quantile (for the robustness tests)
try:
    BQL = sys.argv[1]
except Exception:
    #=== If no input is given, then go for the default binarisation threshold
    BQL=str(0.75)


#=== Default choice to control for energy prices
energy_control = False
print('Binarisation threshold:', BQL)
print('Control for energy prices for robustness tests:', energy_control)
BQL = float(BQL)
ONEDRIVE = ''
#================
np.random.seed(5)
#================

I = [        'association',
             'adoption of standards and rules',
             'assessment and measurement',
             'organizational structuring',
             'modification of procedures',
             'asset modification',
             'training',
             'r&d investments',
             'new products',
     ]
env_sdgs = ['water & energy', 'cons. & prod.', 'biodiversity']

#%% Random test
print('#======================')
print('Running the random test')
print('#======================')
randTEST, plabTEST = pd.DataFrame(), pd.DataFrame()
for FW in [0., 0.25, 0.5, 0.75, 1.0]:
    print('Weight:', FW)
    population_type = str(FW).replace('.', '_')+'_'+population_type_
    dt = pd.read_csv(population_type, low_memory=False)
    gvkey_list = list(np.unique(dt.gvkey))

    dat  = dt.copy()
    dat = dat[dat.number_of_initiatives > 2]
    x, y, forst, sdgs, controls, names, reduced_gvkey_list, _, _ = AM.fit_model(dat, 
                                                                                sdgs_number=env_sdgs,
                                                                                dimension_ = I, \
                                                                                gvkey_list = gvkey_list,
                                                                                model_type = 'RandomForest',
                                                                                normal_fit=False,
                                                                                energy_price_control = energy_control,
                                                                                binarization_quantile= BQL,
                                                                                FW=FW)
    idx = x.groupby(x.index).count()
    idx  = idx[idx > 3].dropna().index
    x = x.loc[idx]
    y = y.loc[idx]

    #=== We need at least some initiatives
    xcut=x.copy()
    xcut['ni'] = xcut[sdgs].sum(axis = 1)
    xcut = xcut[xcut['ni'] > 2]
    xcut = xcut.drop(columns = ['ni'])
    average_allocation = xcut[sdgs].mean().mean()

    res = []
    
    years = xcut[[str(n) for n in range(int(dat.rfyear.min())+1,int(dat.rfyear.max())+1)]].idxmax(axis=1)
    
    for _ in range(20000):
        i  = np.random.choice([n for n in range(len(xcut))])
        a = xcut.iloc[i]
        Y = years.iloc[i]

        #==== The comparison should be with the expected performance
        EPERF = forst.predict(np.array(a).reshape(1,-1))[0]
        #==== 

        #========================================#
        #== Randomly permutate the initiatives ==#
        #========================================#
        if a[sdgs].sum() > 5: ### Makes sense only if there is a substantial amount of initiatives
            s = a.loc[sdgs].sample(frac = 1).reset_index(drop = True)
            s.index = a.loc[sdgs].index
            d = pd.concat((s, a.loc[controls]))
            #= make sure that the variables are correctly sorted for the predictor
            d = d.loc[a.index]
            perdiff = (d[sdgs]-a[sdgs]).abs().sum()
            random_permutation = EPERF - forst.predict(np.array(d).reshape(1,-1))[0]
        else:
            random_permutation = np.nan
            perdiff = np.nan

        #=======================================#
        #== Randomly allocate the initiatives ==#
        #=======================================#
        random_allocation, ran = [], []
        alldiff, adn = [], []
        for k in [1, 2]:
            s = pd.DataFrame(np.random.choice([0,1],size=(len(sdgs),), p = [1-k*average_allocation, k*average_allocation]),index=sdgs)[0]
            d = pd.concat((s, a.loc[controls]))
            #= make sure that the variables are correctly sorted for the predictor
            d = d.loc[a.index]
            alldiff.append((d[sdgs]-a[sdgs]).abs().sum())
            random_allocation.append(EPERF-forst.predict(np.array(d).reshape(1,-1))[0])
            adn.append('allocation_distance_'+str(k).replace('.', '_'))
            ran.append('allocation_'+str(k).replace('.', '_'))

        tmp = [random_permutation] + random_allocation + [perdiff] + alldiff

        res.append([FW, Y, a[sdgs].sum()] + tmp)

        if _%1000==0:
            bu = pd.DataFrame(res, columns = ['Weight', 'rfyear', 'number of initiatives', 'permutation'] + ran + ['permutation_distance'] + adn)
            print(pd.DataFrame(bu.median()).transpose()[['permutation']+ran])
    res = pd.DataFrame(res, columns = ['Weight', 'rfyear', 'number of initiatives', 'permutation'] + ran + ['permutation_distance'] + adn)
    print(pd.DataFrame(res.median()).transpose()[['permutation']+ran])
    randTEST = pd.concat((randTEST, res))
    
pickle.dump([randTEST], open('random_test_'+str(BQL).replace('.', '_')+'.pckl', 'wb'))

